The Esteemed Creators: Juliette Seive, David Tran, Boris Boukhonine


Get data frames

source("../01 Data/package_loader.R", echo = FALSE)
## Loading required package: plyr
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: jsonlite
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:utils':
## 
##     View
## 
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: tidyr
## Loading required package: ggplot2
## Loading required package: ggthemes
## Warning: package 'ggthemes' was built under R version 3.1.3
## Loading required package: grid
source("../01 Data/dataframes.R", echo = TRUE)
## 
## > smoke_free <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from smoke_free_states\""), 
## +     httpheader  .... [TRUNCATED] 
## 
## > smoke_free %>% tbl_df
## Source: local data frame [969 x 6]
## 
##            STATES YEAR
## 1         Montana 1997
## 2        Nebraska 1997
## 3          Nevada 1997
## 4   New Hampshire 1997
## 5      New Jersey 1997
## 6      New Mexico 1997
## 7        New York 1997
## 8  North Carolina 1997
## 9    North Dakota 1997
## 10           Ohio 1997
## ..            ...  ...
## Variables not shown: TYPE_OF_RESTRICTION (fctr), BARS (fctr), RESTAURANTS
##   (fctr), PRIVATE_WORKSITES (fctr)
## 
## > cig_tax <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from cig_tax\""), 
## +     httpheader = c(DB = "jdb ..." ... [TRUNCATED] 
## 
## > cig_tax %>% tbl_df
## Source: local data frame [969 x 3]
## 
##                  STATES YEAR CIGARETTE_TAX_DOLLAR_PER_PACK
## 1               Arizona 2008                         2.000
## 2              Arkansas 2008                         0.590
## 3            California 2008                         0.870
## 4              Colorado 2008                         0.840
## 5           Connecticut 2008                         2.000
## 6              Delaware 2008                         1.150
## 7  District of Columbia 2008                         2.000
## 8               Florida 2008                         0.339
## 9               Georgia 2008                         0.370
## 10               Hawaii 2008                         2.000
## ..                  ...  ...                           ...
## 
## > lobby <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from lobbying\""), 
## +     httpheader = c(DB = "jdbc ..." ... [TRUNCATED] 
## 
## > lobby %>% tbl_df
## Source: local data frame [644 x 6]
## 
##                          TRANSACTION_ID GENERAL_ISSUE_CODE GENERAL_ISSUE
## 1  77C94014-54E8-4421-80E4-28ADD94FCDF3                TOB       Tobacco
## 2  797AE79D-127B-4F1E-BE1F-AB198302703A                TOB       Tobacco
## 3  7B8AEFAC-0EBD-48F8-98FF-53C1DFE20BEE                TOB       Tobacco
## 4  8430E914-1339-40D6-8FA6-7A02AAAE9723                POS        Postal
## 5  8430E914-1339-40D6-8FA6-7A02AAAE9723                TAX         Taxes
## 6  8430E914-1339-40D6-8FA6-7A02AAAE9723                TOB       Tobacco
## 7  85C38A61-6E26-46B7-8AD5-F1ACD1DE5BD8                TOB       Tobacco
## 8  86D49DD6-011E-4266-B2A9-E64728B31F1A                TOB       Tobacco
## 9  8CCD47F0-4FB3-40C7-A86A-9FAE9B715E4C                POS        Postal
## 10 8CCD47F0-4FB3-40C7-A86A-9FAE9B715E4C                TAX         Taxes
## ..                                  ...                ...           ...
## Variables not shown: ID (int), YEAR (int), SERIALID (int)

Wrangle data

source("../02 Data Wrangling/datawrangling.R", echo = TRUE)
## 
## > join1 <- cig_tax %>% inner_join(smoke_free, by = c("YEAR", 
## +     "STATES"))
## 
## > mutated1 <- join1 %>% mutate(cig_tax_distribution = cume_dist(CIGARETTE_TAX_DOLLAR_PER_PACK))
## 
## > mutated1 %>% tbl_df
## Source: local data frame [969 x 8]
## 
##                  STATES YEAR CIGARETTE_TAX_DOLLAR_PER_PACK
## 1               Arizona 2008                         2.000
## 2              Arkansas 2008                         0.590
## 3            California 2008                         0.870
## 4              Colorado 2008                         0.840
## 5           Connecticut 2008                         2.000
## 6              Delaware 2008                         1.150
## 7  District of Columbia 2008                         2.000
## 8               Florida 2008                         0.339
## 9               Georgia 2008                         0.370
## 10               Hawaii 2008                         2.000
## ..                  ...  ...                           ...
## Variables not shown: TYPE_OF_RESTRICTION (fctr), BARS (fctr), RESTAURANTS
##   (fctr), PRIVATE_WORKSITES (fctr), cig_tax_distribution (dbl)
## 
## > join2 <- cig_tax %>% left_join(smoke_free, by = c("YEAR", 
## +     "STATES"))
## 
## > filter1 <- join2 %>% select(STATES, YEAR, CIGARETTE_TAX_DOLLAR_PER_PACK, 
## +     TYPE_OF_RESTRICTION) %>% filter(TYPE_OF_RESTRICTION != "No law, desi ..." ... [TRUNCATED] 
## 
## > mutated2 <- filter1 %>% group_by(STATES) %>% summarize(minyear = min(YEAR), 
## +     avg_tax = mean(CIGARETTE_TAX_DOLLAR_PER_PACK))
## 
## > mutated2 %>% tbl_df
## Source: local data frame [37 x 3]
## 
##                  STATES minyear   avg_tax
## 1               Arizona    2007 2.0000000
## 2              Arkansas    2006 0.9400000
## 3              Colorado    2006 0.8400000
## 4              Delaware    2002 1.0616667
## 5  District of Columbia    2006 2.0625000
## 6               Florida    2003 0.7935454
## 7                Hawaii    2006 2.5750000
## 8                 Idaho    2004 0.5700000
## 9              Illinois    2008 1.3133334
## 10              Indiana    2012 0.9950000
## ..                  ...     ...       ...
## 
## > join3 <- cig_tax %>% full_join(smoke_free, by = c("YEAR", 
## +     "STATES"))
## 
## > filter2 <- join3 %>% select(STATES, YEAR, CIGARETTE_TAX_DOLLAR_PER_PACK, 
## +     TYPE_OF_RESTRICTION) %>% filter(TYPE_OF_RESTRICTION == "No law, desi ..." ... [TRUNCATED] 
## 
## > filter2 %>% tbl_df
## Source: local data frame [104 x 4]
## 
##         STATES YEAR CIGARETTE_TAX_DOLLAR_PER_PACK
## 1      Alabama 2010                         0.425
## 2       Alaska 2010                         2.000
## 3   California 2010                         0.870
## 4  Connecticut 2010                         3.000
## 5      Georgia 2010                         0.370
## 6      Indiana 2010                         0.995
## 7     Kentucky 2010                         0.600
## 8  Mississippi 2010                         0.680
## 9     Missouri 2010                         0.170
## 10    Oklahoma 2010                         1.030
## ..         ...  ...                           ...
## Variables not shown: TYPE_OF_RESTRICTION (fctr)

Crosstabs




R Visualizations

source("../03 Visualization/visualizations.R", echo = TRUE)
## 
## > g2 <- mutated2 %>% ggplot(aes(x = STATES, y = minyear)) + 
## +     geom_point(aes(size = avg_tax, color = avg_tax)) + scale_size_continuous(range = c( .... [TRUNCATED] 
## 
## > g2

## 
## > g1 <- mutated1 %>% ggplot(aes(x = YEAR, y = cig_tax_distribution, 
## +     color = STATES)) + geom_point() + geom_line() + ggtitle("Cigarette Tax % by ..." ... [TRUNCATED] 
## 
## > g1


Visualization 3: Line plot of Cigarette Tax in States without smoking bans from 1995-2013




Visualization 4: Type of smoking restriction for each state for 1995-2013






Visualization 5 Here are two states, Arizona and New York respectively, highlighted in the previous visualization







Visualization 6




Visualization 7: Map of states with smoking bans with average cigarette tax represented by point size in even-numbered years for 2000-2010






Visualization 11




Visualization 12




Visualization 13




Visualization 14